{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Genetic CNN\n",
    "#### CNN architecture exploration using Genetic Algorithm as discussed in the following paper: <a href=\"https://arxiv.org/abs/1703.01513\">Genetic CNN</a>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Import required libraries \n",
    "1. <a href=\"https://github.com/DEAP/deap\">DEAP</a> for Genetic Algorithm\n",
    "2. <a href=\"https://github.com/thieman/py-dag\"> py-dag</a> for Directed Asyclic Graph (Did few changes for Python 3, check dag.py)\n",
    "3. Tensorflow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import random\n",
    "import numpy as np\n",
    "\n",
    "from deap import base, creator, tools, algorithms\n",
    "from scipy.stats import bernoulli\n",
    "from dag import DAG, DAGValidationError\n",
    "\n",
    "import tensorflow as tf\n",
    "from tensorflow.examples.tutorials.mnist import input_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "mnist = input_data.read_data_sets(\"mnist_data/\", one_hot=True)\n",
    "train_imgs   = mnist.train.images\n",
    "train_labels = mnist.train.labels\n",
    "test_imgs    = mnist.test.images\n",
    "test_labels  = mnist.test.labels\n",
    "\n",
    "train_imgs = np.reshape(train_imgs,[-1,28,28,1])\n",
    "test_imgs = np.reshape(test_imgs,[-1,28,28,1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "STAGES = np.array([\"s1\",\"s2\",\"s3\"]) # S\n",
    "NUM_NODES = np.array([3,4,5]) # K\n",
    "\n",
    "L =  0 # genome length\n",
    "BITS_INDICES, l_bpi = np.empty((0,2),dtype = np.int32), 0 # to keep track of bits for each stage S\n",
    "for nn in NUM_NODES:\n",
    "    t = nn * (nn - 1)\n",
    "    BITS_INDICES = np.vstack([BITS_INDICES,[l_bpi, l_bpi + int(0.5 * t)]])\n",
    "    l_bpi = int(0.5 * t)\n",
    "    L += t\n",
    "L = int(0.5 * L)\n",
    "\n",
    "TRAINING_EPOCHS = 20\n",
    "BATCH_SIZE = 20\n",
    "TOTAL_BATCHES = train_imgs.shape[0] // BATCH_SIZE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def weight_variable(weight_name, weight_shape):\n",
    "    return tf.Variable(tf.truncated_normal(weight_shape, stddev = 0.1),name = ''.join([\"weight_\", weight_name]))\n",
    "\n",
    "def bias_variable(bias_name,bias_shape):\n",
    "    return tf.Variable(tf.constant(0.01, shape = bias_shape),name = ''.join([\"bias_\", bias_name]))\n",
    "\n",
    "def linear_layer(x,n_hidden_units,layer_name):\n",
    "    n_input = int(x.get_shape()[1])\n",
    "    weights = weight_variable(layer_name,[n_input, n_hidden_units])\n",
    "    biases = bias_variable(layer_name,[n_hidden_units])\n",
    "    return tf.add(tf.matmul(x,weights),biases)\n",
    "\n",
    "def apply_convolution(x,kernel_height,kernel_width,num_channels,depth,layer_name):\n",
    "    weights = weight_variable(layer_name,[kernel_height, kernel_width, num_channels, depth])\n",
    "    biases = bias_variable(layer_name,[depth])\n",
    "    return tf.nn.relu(tf.add(tf.nn.conv2d(x, weights,[1,2,2,1],padding = \"SAME\"),biases)) \n",
    "\n",
    "def apply_pool(x,kernel_height,kernel_width,stride_size):\n",
    "    return tf.nn.max_pool(x, ksize=[1, kernel_height, kernel_width, 1], \n",
    "            strides=[1, 1, stride_size, 1], padding = \"SAME\")\n",
    "\n",
    "def add_node(node_name, connector_node_name, h = 5, w = 5, nc = 1, d = 1):\n",
    "    with tf.name_scope(node_name) as scope:\n",
    "        conv = apply_convolution(tf.get_default_graph().get_tensor_by_name(connector_node_name), \n",
    "                   kernel_height = h, kernel_width = w, num_channels = nc , depth = d, \n",
    "                   layer_name = ''.join([\"conv_\",node_name]))\n",
    "\n",
    "def sum_tensors(tensor_a,tensor_b,activation_function_pattern):\n",
    "    if not tensor_a.startswith(\"Add\"):\n",
    "        tensor_a = ''.join([tensor_a,activation_function_pattern])\n",
    "        \n",
    "    return tf.add(tf.get_default_graph().get_tensor_by_name(tensor_a),\n",
    "                 tf.get_default_graph().get_tensor_by_name(''.join([tensor_b,activation_function_pattern])))\n",
    "\n",
    "def has_same_elements(x):\n",
    "    return len(set(x)) <= 1\n",
    "\n",
    "'''This method will come handy to first generate DAG independent of Tensorflow, \n",
    "    afterwards generated graph can be used to generate Tensorflow graph'''\n",
    "def generate_dag(optimal_indvidual,stage_name,num_nodes):\n",
    "    # create nodes for the graph\n",
    "    nodes = np.empty((0), dtype = np.str)\n",
    "    for n in range(1,(num_nodes + 1)):\n",
    "        nodes = np.append(nodes,''.join([stage_name,\"_\",str(n)]))\n",
    "    \n",
    "    # initialize directed asyclic graph (DAG) and add nodes to it\n",
    "    dag = DAG()\n",
    "    for n in nodes:\n",
    "        dag.add_node(n)\n",
    "\n",
    "    # split best indvidual found via GA to identify vertices connections and connect them in DAG \n",
    "    edges = np.split(optimal_indvidual,np.cumsum(range(num_nodes - 1)))[1:]\n",
    "    v2 = 2\n",
    "    for e in edges:\n",
    "        v1 = 1\n",
    "        for i in e:\n",
    "            if i:\n",
    "                dag.add_edge(''.join([stage_name,\"_\",str(v1)]),''.join([stage_name,\"_\",str(v2)])) \n",
    "            v1 += 1\n",
    "        v2 += 1\n",
    "\n",
    "    # delete nodes not connected to anyother node from DAG\n",
    "    for n in nodes:\n",
    "        if len(dag.predecessors(n)) == 0 and len(dag.downstream(n)) == 0:\n",
    "            dag.delete_node(n)\n",
    "            nodes = np.delete(nodes, np.where(nodes == n)[0][0])\n",
    "    \n",
    "    return dag, nodes\n",
    "\n",
    "def generate_tensorflow_graph(individual,stages,num_nodes,bits_indices):\n",
    "    activation_function_pattern = \"/Relu:0\"\n",
    "    \n",
    "    tf.reset_default_graph()\n",
    "    X = tf.placeholder(tf.float32, shape = [None,28,28,1], name = \"X\")\n",
    "    Y = tf.placeholder(tf.float32,[None,10],name = \"Y\")\n",
    "        \n",
    "    d_node = X\n",
    "    for stage_name,num_node,bpi in zip(stages,num_nodes,bits_indices):\n",
    "        indv = individual[bpi[0]:bpi[1]]\n",
    "\n",
    "        add_node(''.join([stage_name,\"_input\"]),d_node.name)\n",
    "        pooling_layer_name = ''.join([stage_name,\"_input\",activation_function_pattern])\n",
    "\n",
    "        if not has_same_elements(indv):\n",
    "            # ------------------- Temporary DAG to hold all connections implied by GA solution ------------- #  \n",
    "\n",
    "            # get DAG and nodes in the graph\n",
    "            dag, nodes = generate_dag(indv,stage_name,num_node) \n",
    "            # get nodes without any predecessor, these will be connected to input node\n",
    "            without_predecessors = dag.ind_nodes() \n",
    "            # get nodes without any successor, these will be connected to output node\n",
    "            without_successors = dag.all_leaves()\n",
    "\n",
    "            # ----------------------------------------------------------------------------------------------- #\n",
    "\n",
    "            # --------------------------- Initialize tensforflow graph based on DAG ------------------------- #\n",
    "\n",
    "            for wop in without_predecessors:\n",
    "                add_node(wop,''.join([stage_name,\"_input\",activation_function_pattern]))\n",
    "\n",
    "            for n in nodes:\n",
    "                predecessors = dag.predecessors(n)\n",
    "                if len(predecessors) == 0:\n",
    "                    continue\n",
    "                elif len(predecessors) > 1:\n",
    "                    first_predecessor = predecessors[0]\n",
    "                    for prd in range(1,len(predecessors)):\n",
    "                        t = sum_tensors(first_predecessor,predecessors[prd],activation_function_pattern)\n",
    "                        first_predecessor = t.name\n",
    "                    add_node(n,first_predecessor)\n",
    "                elif predecessors:\n",
    "                    add_node(n,''.join([predecessors[0],activation_function_pattern]))\n",
    "\n",
    "            if len(without_successors) > 1:\n",
    "                first_successor = without_successors[0]\n",
    "                for suc in range(1,len(without_successors)):\n",
    "                    t = sum_tensors(first_successor,without_successors[suc],activation_function_pattern)\n",
    "                    first_successor = t.name\n",
    "                add_node(''.join([stage_name,\"_output\"]),first_successor) \n",
    "            else:\n",
    "                add_node(''.join([stage_name,\"_output\"]),''.join([without_successors[0],activation_function_pattern])) \n",
    "\n",
    "            pooling_layer_name = ''.join([stage_name,\"_output\",activation_function_pattern])\n",
    "            # ------------------------------------------------------------------------------------------ #\n",
    "\n",
    "        d_node =  apply_pool(tf.get_default_graph().get_tensor_by_name(pooling_layer_name), \n",
    "                                 kernel_height = 16, kernel_width = 16,stride_size = 2)\n",
    "\n",
    "    shape = d_node.get_shape().as_list()\n",
    "    flat = tf.reshape(d_node, [-1, shape[1] * shape[2] * shape[3]])\n",
    "    logits = linear_layer(flat,10,\"logits\")\n",
    "    \n",
    "    xentropy =  tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = Y)\n",
    "    loss_function = tf.reduce_mean(xentropy)\n",
    "    optimizer = tf.train.AdamOptimizer().minimize(loss_function) \n",
    "    accuracy = tf.reduce_mean(tf.cast( tf.equal(tf.argmax(tf.nn.softmax(logits),1), tf.argmax(Y,1)), tf.float32))\n",
    "    \n",
    "    return  X, Y, optimizer, loss_function, accuracy\n",
    "\n",
    "def evaluateModel(individual):\n",
    "    score = 0.0\n",
    "    X, Y, optimizer, loss_function, accuracy = generate_tensorflow_graph(individual,STAGES,NUM_NODES,BITS_INDICES)\n",
    "    with tf.Session() as session:\n",
    "        tf.global_variables_initializer().run()\n",
    "        for epoch in range(TRAINING_EPOCHS):\n",
    "            for b in range(TOTAL_BATCHES):\n",
    "                offset = (epoch * BATCH_SIZE) % (train_labels.shape[0] - BATCH_SIZE)\n",
    "                batch_x = train_imgs[offset:(offset + BATCH_SIZE), :, :, :]\n",
    "                batch_y = train_labels[offset:(offset + BATCH_SIZE), :]\n",
    "                _, c = session.run([optimizer, loss_function],feed_dict={X: batch_x, Y : batch_y})\n",
    "                \n",
    "        score = session.run(accuracy, feed_dict={X: test_imgs, Y: test_labels})\n",
    "        #print('Accuracy: ',score)\n",
    "    return score,"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "population_size = 20\n",
    "num_generations = 3\n",
    "\n",
    "creator.create(\"FitnessMax\", base.Fitness, weights = (1.0,))\n",
    "creator.create(\"Individual\", list , fitness = creator.FitnessMax)\n",
    "\n",
    "toolbox = base.Toolbox()\n",
    "toolbox.register(\"binary\", bernoulli.rvs, 0.5)\n",
    "toolbox.register(\"individual\", tools.initRepeat, creator.Individual, toolbox.binary, n = L)\n",
    "toolbox.register(\"population\", tools.initRepeat, list , toolbox.individual)\n",
    "\n",
    "toolbox.register(\"mate\", tools.cxOrdered)\n",
    "toolbox.register(\"mutate\", tools.mutShuffleIndexes, indpb = 0.8)\n",
    "toolbox.register(\"select\", tools.selRoulette)\n",
    "toolbox.register(\"evaluate\", evaluateModel)\n",
    "\n",
    "popl = toolbox.population(n = population_size)\n",
    "result = algorithms.eaSimple(popl, toolbox, cxpb = 0.4, mutpb = 0.05, ngen = num_generations, verbose = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# print top-3 optimal solutions \n",
    "best_individuals = tools.selBest(popl, k = 3)\n",
    "for bi in best_individuals:\n",
    "    print(bi)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "--------------------------------------------------------------------------------------------------------------------"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
